Libraries
library(tidyverse)
library(readxl)
library(ggforce)
library(knitr)
library(forcats)













pitcher_test <- pitchers %>%
mutate(group_indicator = case_when(
ID == "Great" ~ 1,
ID == "Decent" ~ 2,
ID == "Bad" ~ 3))
# Correlation between spin rate and run expectancy
pitcher_test %>%
filter(!is.na(release_spin_rate) & !is.na(run_exp_added)) %>%
with( cor(release_spin_rate, run_exp_added) )
## [1] 0.01304765
# Correlation between spin rate and run expectancy
pitcher_test %>%
filter(!is.na(pitch_speed) & !is.na(run_exp_added)) %>%
with( cor(pitch_speed, run_exp_added) )
## [1] -0.008914642
# Can batted ball type predict run expectancy?
lm(run_exp_added ~ bb_type, data = pitcher_test)
##
## Call:
## lm(formula = run_exp_added ~ bb_type, data = pitcher_test)
##
## Coefficients:
## (Intercept) bb_typeground_ball bb_typeline_drive bb_typepopup
## -0.1025 0.1706 -0.1393 0.3284
# Intercept = Fly Ball
test_model <- pitcher_test %>%
filter( !(player_name %in%
c("Scherzer, Max", "Taillon, Jameson", "Berríos, José")) ) %>%
lm(run_exp_added ~ bb_type, data = .)
test_testdata <- pitcher_test %>%
filter(player_name %in%
c("Scherzer, Max", "Taillon, Jameson", "Berríos, José"),
!is.na(bb_type)) %>%
select(ID, pitch_type, run_exp_added, bb_type)
test_testdata %>%
mutate(preds = predict(test_model, test_testdata))
## # A tibble: 1,442 × 5
## ID pitch_type run_exp_added bb_type preds
## <chr> <chr> <dbl> <chr> <dbl>
## 1 Great SL 0.207 ground_ball 0.0784
## 2 Great CU 0.221 ground_ball 0.0784
## 3 Great CH -1.03 fly_ball -0.111
## 4 Great FF 0.181 fly_ball -0.111
## 5 Great FF -1.66 fly_ball -0.111
## 6 Great SL -0.27 fly_ball -0.111
## 7 Great FF 0.206 ground_ball 0.0784
## 8 Great SL -0.183 ground_ball 0.0784
## 9 Great SL 0.406 ground_ball 0.0784
## 10 Great FC -0.752 ground_ball 0.0784
## # ℹ 1,432 more rows
# row.names = FALSE
arsenal %>%
filter(pitch_type == "SL",
pitch_hand == "L") %>%
with(cor(sqrt(pitcher_break_x^2 + pitcher_break_z^2), rv100))
## [1] -0.01633807
pitchers %>%
filter(player_name %in% "Scherzer, Max",
!is.na(bb_type)) %>%
select(ID, pitch_type, run_exp_added, bb_type)
## # A tibble: 357 × 4
## ID pitch_type run_exp_added bb_type
## <chr> <chr> <dbl> <chr>
## 1 Great SL 0.207 ground_ball
## 2 Great CU 0.221 ground_ball
## 3 Great CH -1.03 fly_ball
## 4 Great FF 0.181 fly_ball
## 5 Great FF -1.66 fly_ball
## 6 Great SL -0.27 fly_ball
## 7 Great FF 0.206 ground_ball
## 8 Great SL -0.183 ground_ball
## 9 Great SL 0.406 ground_ball
## 10 Great FC -0.752 ground_ball
## # ℹ 347 more rows
pitchers %>%
filter(pitch_type == "SL",
!is.na(pfx_x), !is.na(release_spin_rate)) %>%
with(cor(pfx_x, release_spin_rate))
## [1] 0.1578424
avgs <- arsenal %>%
group_by(pitch_name) %>%
summarize(Speed = round(weighted.mean(pitch_speed, pitch_usage*pitches), 1),
"Spin Rate" = round(weighted.mean(spin_rate, pitch_usage*pitches), 0),
"H. Break (in.)" = round(weighted.mean(pitcher_break_x, pitch_usage*pitches), 1),
"V. Break (in.)" = round(weighted.mean(pitcher_break_z, pitch_usage*pitches), 1),
wOBA = round(weighted.mean(wOBA, pitch_usage*pitches), 3),
"Whiff Rate" = round(weighted.mean(whiff_percent, pitch_usage*pitches), 1),
"Hard Hit Rate" = round(weighted.mean(hard_hit_percent, pitch_usage*pitches), 1)) %>%
as.data.frame() %>%
mutate("Whiff Rate" = paste0(`Whiff Rate`, "%"),
"Hard Hit Rate" = paste0(`Hard Hit Rate`, "%")) %>%
rename(Pitch = pitch_name)
avgs %>%
kable()
| 4-Seamer |
94.1 |
2280 |
7.4 |
14.6 |
0.340 |
22.1% |
44% |
| Changeup |
85.0 |
1778 |
14.4 |
32.3 |
0.287 |
31.5% |
31.1% |
| Curveball |
79.7 |
2548 |
9.1 |
52.9 |
0.277 |
31.9% |
34.1% |
| Cutter |
89.8 |
2386 |
3.1 |
25.3 |
0.323 |
23.9% |
35.9% |
| Sinker |
93.5 |
2133 |
15.1 |
23.6 |
0.353 |
15.2% |
42.1% |
| Slider |
85.1 |
2423 |
5.9 |
36.1 |
0.279 |
35.1% |
33.3% |
| Slurve |
82.3 |
2622 |
15.7 |
42.5 |
0.262 |
27.3% |
33.8% |
| Splitter |
87.2 |
1427 |
11.8 |
32.3 |
0.243 |
36.8% |
32.5% |
| Sweeper |
82.1 |
2626 |
14.7 |
39.5 |
0.258 |
33.9% |
27.2% |